*Keep only relevant variables used in analysis
keep ///
ind_id new_ind_id hh_id edate day_of_week month year emy area_id /// //logistics
vnv vnv_base /// //logistics
age female spouse /// //demographics
total_income reg_income irr_income salary /// //income
int_inc dividend /// //investment income
sa_bal ca_bal ccard_bal sa_lim ca_lim ccard_lim /// //financial balances and limits
total_fin_cost drattarv fit kostn2 utvextir /// //financial costs
cash liquidity /// //liquidity
od_amount payday_loan /// //debt
sa_count ca_count ccard_count drattarv_count fit_count kostn_count utvextir_count ///
total_exp nec unnec /// //expenditure supercateogires
groc fuel pharm ca alc rmf group recr /// //expenditure categories
lottery_exp lottery_char_exp gambling_exp gaming cinema books craft fish /// //expenditure categories
recr_area special sa ts swim toys /// //expenditure categories
hi hs transp educ media charities /// //expenditure categories
new_car holiday /// //expenditure categories
benefits inv_ben ub /// //benefits
loan_wo taxes stud_loan lottery_inc lottery_char_inc gambling_inc ins_claim /// //windfalls
cr_utiliz /// //logistical information
total_logins log smart_log desktop_log tabloid_log unknown_log //logistical information

*Give each day a number
sort edate ind_id
egen edate_id = group(edate)

keep if edate_id > 1334
*keep all observations after 1. September 2014

*Count all days with valid (=non-missing) balances for each individual
gen dummy = 1
bysort new_ind_id: egen agg_dummy = sum(dummy) if cash != .

*Keep the individual if it has at least 884 observations (from 1. Sept 2014 to 31 Jan 2017)
keep if agg_dummy >= 884 & agg_dummy !=.
*drops all individuals with missing balances

*Count number of household members on a given day
sort hh_id edate
egen hh_edate = group(hh_id edate)
bysort hh_edate: egen twohhdum = sum(dummy)
gen twomemb = 0
replace twomemb = 1 if twohhdum ==2 

*Whether individual has EVER been in a 2 member households
bysort ind_id: egen everin2memberhh = max(twomemb)

*Whether individual has ALWAYS been in a 2 member household
bysort hh_id: egen agg_dummy_hh = sum(dummy) 
gen alwaysin2memberhh = 1 if agg_dummy_hh == 1768 & agg_dummy_hh!=. 
replace alwaysin2memberhh = 0 if agg_dummy_hh < 1768 & agg_dummy_hh!=.

*Drop if individual only temporarily in 2 member hh
drop if alwaysin2memberhh==0 & everin2memberhh == 1

*codebook ind_id if agg_dummy_hh < 884 there are 21 individuals for whom id changes
drop if agg_dummy_hh < 884

*Overview of the sample 
*codebook hh_id if alwaysin2memberhh ==0 //10,169 households with 1 individual always
*codebook ind_id if alwaysin2memberhh ==0 //10,169 individuals always in 1-member hh
*codebook hh_id if alwaysin2memberhh ==1 //691 households with individuals who have always been linked

*Note: final sample consists of 11,551 individuals


